Load libraries

This project uses renv to keep track of installed packages. Install renv if not installed and load dependencies with renv::restore().

install.packages("renv")
renv::restore()
library(readr)
library(dplyr)
library(tidyr)
library(reshape2)
library(GenomicRanges)
library(pheatmap)
library(tibble)
library(ggplot2)
library(stringr)
library(cowplot)
library(markdown)
library(RColorBrewer)
library(GenomicAlignments)
library(reshape2)

Read data

  1. Get list of samples
samples <- read_tsv("config/samples.tsv", show_col_types = FALSE)
units <- read_tsv("config/units.tsv", show_col_types = FALSE)
sample_units <- dplyr::left_join(samples, units, by = "sample_name") %>%
  unite(sample_unit, sample_name, unit_name, remove = FALSE)
sample_units
  1. Read Samtools idxstats to get human coverage for normalization

Notes:

idxstats_exogenousrna_dir <-
  "results/samtools_idxstats/exogenous_rna/"

idxstats_human_dir <-
  "results/samtools_idxstats/Homo_sapiens.GRCh38.dna.primary_assembly/"

bowtie2_human_logs <-
  "results/logs/bowtie2/Homo_sapiens.GRCh38.dna.primary_assembly/"

idxstats <- tibble()

for (row in seq_len(nrow(sample_units))) {
  sample <- sample_units[row, ]$sample_unit

  # Read `idsxstats` for exogenous mapped reads
  exogenous_rna_stats <- read_tsv(
    file.path(idxstats_exogenousrna_dir, sprintf("%s.bam.idxstats", sample)),
    col_names = c(
      "sequence_name", "sequence_length",
      "mapped_reads", "unmapped_reads"
    ),
    col_types = "ciii"
  )
  exogenous_rna_mapped_reads <- exogenous_rna_stats %>%
    filter(!sequence_name %in% c("*")) %>%
    select(sequence_name, mapped_reads) %>%
    mutate(sample = sample)

  # Read `idxstats` for human mapped reads
  human_stats <- read_tsv(
    file.path(idxstats_human_dir, sprintf("%s.bam.idxstats", sample)),
    col_names = c(
      "sequence_name", "sequence_length",
      "mapped_reads", "unmapped_reads"
    ),
    col_types = "ciii"
  )
  grch38_mapped_reads <- human_stats %>%
    filter(!sequence_name %in% c("*")) %>%
    select(mapped_reads) %>%
    sum()
  grch38_mapped_reads <- tibble(
    sequence_name = "grch38_mapped_reads",
    mapped_reads = grch38_mapped_reads,
    sample = sample
  )

  # Read bowtie2 logs for unmapped reads
  bowtie2_log <- readLines(
    file.path(bowtie2_human_logs, sprintf("%s.log", sample))
  )
  total_pairs <- strtoi(str_split(bowtie2_log[1], " ")[[1]][1])
  total_reads <- total_pairs * 2
  unmapped_reads <- tibble(
    sequence_name = "unmapped",
    mapped_reads = total_reads - grch38_mapped_reads$mapped_reads,
    sample = sample
  )

  # Consolidate counts for rows
  idxstats <- rbind(
    idxstats,
    exogenous_rna_mapped_reads,
    grch38_mapped_reads,
    unmapped_reads
  )
}
idxstats
  1. Read bedpe files to get exogenous rna coverage of paired reads
bedpe_data <- tibble()
for (sample in sample_units$sample_unit) {
  data <-
    readr::read_tsv(
      sprintf(
        "results/alignments/exogenous_rna/bedpe/%s.bedpe", sample
      ),
      col_names = c(
        "chrom1", "chrom1Start", "chrom1End",
        "chrom2", "chrom2Start", "chrom2End",
        "name", "score", "strand1", "strand2"
      ),
      col_types = "ciiciicicc"
    )
  bedpe_data <- tibble(rbind(
    bedpe_data,
    cbind(
      sample = sample,
      data
    )
  ))
}
bedpe_data

Coverage

Concordant vs Discordant paired reads

Concordant pairs are pairs of reads that:

  • Align on the same pegRNA
  • Align within 500 bp of each other
  • Align in the expected forward-reverse orientation (--> .. <--)

Discordant reads aligned but whose mate:

  • Did not align (on the pegRNA)
  • Aligned more than 500 bp away
  • Aligned in an unexpected orientation
## Config and function definition

bam_dir <- "results/alignments/exogenous_rna/sorted"

last_day <- 0
cols <- brewer.pal(n = 5, name = "RdBu")

concordant_cell_line_colors <- list(
  "Parental" = "#CA0020",
  "P1E10" = "#0571B0"
)

discordant_cell_line_colors <- list(
  "Parental" = "#F4A582",
  "P1E10" = "#92C5DE"
)

# Exogenous RNA mixtures
rna_mixes <- tibble()
for (mix in c("mastermix1", "mastermix2")) {
  t <- readDNAStringSet(sprintf("data/references/%s.fa", mix))
  rna_mixes <- rbind(rna_mixes, tibble(
    exogenous_rna = mix,
    rna_species = word(t@ranges@NAMES, 1),
    length = t@ranges@width
  ))
}

get_pegrna_plot_data <- function(sequence_name,
                                 normalization_factor,
                                 mix = NA) {
  samples <- sample_units %>%
    full_join(rna_mixes, by = "exogenous_rna") %>%
    arrange(.data$exogenous_rna, day, cell_line) %>%
    filter(rna_species == sequence_name)

  if (!is.na(mix)) {
    samples <- samples %>% filter(.data$exogenous_rna == mix)
  }
  samples <- samples %>% pull(.data$sample_unit)

  plot_data <- list()

  for (s in samples) {
    if (normalization_factor == "exogenous_rna_mapped_reads") {
      norm_seqname <- sequence_name
    } else if (normalization_factor == "grch38_mapped_reads") {
      norm_seqname <- "grch38_mapped_reads"
    }
    normalization_read_count <- idxstats %>%
      filter(sample == s) %>%
      filter(sequence_name == norm_seqname) %>%
      pull(.data$mapped_reads)

    cell_line <- sample_units %>%
      filter(.data$sample_unit == s) %>%
      pull(cell_line)

    day <- sample_units %>%
      filter(.data$sample_unit == s) %>%
      pull(day)

    which <- GRanges(sprintf("%s:1-%i", sequence_name, rna_mixes %>%
      filter(sequence_name == sequence_name) %>%
      pull(length)))
    param_discordant <- ScanBamParam(
      flag = scanBamFlag(isProperPair = FALSE),
      which = which
    )

    discordant <- readGAlignments(sprintf("%s/%s.bam", bam_dir, s),
      param = param_discordant
    )
    cov_discordant <- coverage(discordant)
    cov_discordant_norm <- cov_discordant / normalization_read_count

    which <- GRanges(sprintf("%s:1-%i", sequence_name, rna_mixes %>%
      filter(sequence_name == sequence_name) %>%
      pull(length)))
    param_concordant <- ScanBamParam( # nolint
      flag = scanBamFlag(isProperPair = TRUE),
      which = which
    )

    df_complete <- bedpe_data %>%
      filter(sample == s) %>%
      filter(.data$chrom1 == sequence_name)
    concordant <- GRanges(
      ranges = IRanges(
        start = df_complete$chrom1Start,
        end = df_complete$chrom2End
      ),
      seqnames = df_complete$chrom1
    )
    cov_concordant <- coverage(concordant)
    cov_concordant_norm <- cov_concordant / normalization_read_count

    plot_data[[s]] <- list(
      cell_line = cell_line,
      day = day,
      discordant = cov_discordant_norm,
      concordant = cov_concordant_norm
    )
  }
  return(plot_data)
}

pegrna_plots <- function(sequence_name,
                         normalization_factor,
                         mix = NA,
                         ylim = NA,
                         ylab,
                         vlines = NA) {
  plot_data <- get_pegrna_plot_data(
    sequence_name = sequence_name,
    normalization_factor = normalization_factor,
    mix = mix
  )

  # Find largest value for rna_species
  if (is.na(ylim)) {
    m <- 0
    for (s in names(plot_data)) {
      m <- max(
        m,
        max(plot_data[[s]][["concordant"]][[sequence_name]]),
        max(plot_data[[s]][["discordant"]][[sequence_name]])
      )
    }
    ylim <- c(0, m)
  }

  last_day <- 0

  par(mfrow = c(2, 1))

  for (s in names(plot_data)) {
    series_data_concordant <- plot_data[[s]][["concordant"]][[sequence_name]]
    series_data_discordant <- plot_data[[s]][["discordant"]][[sequence_name]]

    day <- plot_data[[s]][["day"]]
    cell_line <- plot_data[[s]][["cell_line"]]
    concordant_color <- concordant_cell_line_colors[[cell_line]]
    discordant_color <- discordant_cell_line_colors[[cell_line]]

    if (day != last_day) {
      plot(series_data_concordant,
        type = "l",
        col = concordant_color,
        ylim = ylim,
        main = sprintf("Day %s", day),
        xlab = sprintf("%s position", sequence_name),
        ylab = ylab
      )
      lines(series_data_discordant,
        type = "l",
        col = discordant_cell_line_colors[[plot_data[[s]][["cell_line"]]]]
      )
    } else {
      lines(series_data_concordant,
        type = "l",
        col = concordant_color
      )
      lines(series_data_discordant,
        type = "l",
        col = discordant_color
      )
    }
    legend("top",
      legend = c(
        "Parental:Concordant",
        "P1E10:Concordant",
        "Parental:Discordant",
        "P1E10:Discordant"
      ),
      col = unlist(c(
        concordant_cell_line_colors,
        discordant_cell_line_colors
      )),
      lty = 1,
      cex = 0.75
    )
    for (vline in vlines) {
      abline(v = vline, lty = 2)
      text(
        x = vline, y = 0, as.character(vline),
        pos = 2, cex = 0.75, lwd = 0.5
      )
    }
    last_day <- day
  }
}

VEGFA

rna_mix_rows <- rna_mixes %>% filter(grepl("VEGFA", rna_species))

for (norm_factor in c("exogenous_rna_mapped_reads", "grch38_mapped_reads")) {
  for (row in seq_len(nrow(rna_mix_rows))) {
    rna_species <- rna_mix_rows[[row, "rna_species"]]
    mix <- rna_mix_rows[[row, "exogenous_rna"]]

    pegrna_plots(
      sequence_name = rna_species,
      normalization_factor = norm_factor,
      mix = mix,
      ylab = sprintf("%s coverage (normalized to %s)", mix, norm_factor),
    )
  }
}

FANCF

for (norm_factor in c("exogenous_rna_mapped_reads", "grch38_mapped_reads")) {
  for (rna_species in rna_mixes %>%
    filter(grepl("FANCF", rna_species)) %>%
    pull(rna_species)) {
    pegrna_plots(
      sequence_name = rna_species,
      normalization_factor = norm_factor,
      ylab = sprintf("Coverage (normalized to %s)", norm_factor),
    )
  }
}

HEK3

for (norm_factor in c("exogenous_rna_mapped_reads", "grch38_mapped_reads")) {
  for (rna_species in rna_mixes %>%
    filter(grepl("HEK3", rna_species)) %>%
    pull(rna_species)) {
    pegrna_plots(
      sequence_name = rna_species,
      normalization_factor = norm_factor,
      ylab = sprintf("Coverage (normalized to %s)", norm_factor),
    )
  }
}

DNMT1

for (norm_factor in c("exogenous_rna_mapped_reads", "grch38_mapped_reads")) {
  for (rna_species in rna_mixes %>%
    filter(grepl("DNMT1", rna_species)) %>%
    pull(rna_species)) {
    pegrna_plots(
      sequence_name = rna_species,
      normalization_factor = norm_factor,
      ylab = sprintf("Coverage (normalized to %s)", norm_factor),
    )
  }
}

RUNX1

for (norm_factor in c("exogenous_rna_mapped_reads", "grch38_mapped_reads")) {
  for (rna_species in rna_mixes %>%
    filter(grepl("RUNX1", rna_species)) %>%
    pull(rna_species)) {
    pegrna_plots(
      sequence_name = rna_species,
      normalization_factor = norm_factor,
      ylab = sprintf("Coverage (normalized to %s)", norm_factor),
    )
  }
}

EMX1

for (norm_factor in c("exogenous_rna_mapped_reads", "grch38_mapped_reads")) {
  for (rna_species in rna_mixes %>%
    filter(grepl("EMX1", rna_species)) %>%
    pull(rna_species)) {
    pegrna_plots(
      sequence_name = rna_species,
      normalization_factor = norm_factor,
      ylab = sprintf("Coverage (normalized to %s)", norm_factor),
    )
  }
}

RNF2

for (norm_factor in c("exogenous_rna_mapped_reads", "grch38_mapped_reads")) {
  for (rna_species in rna_mixes %>%
    filter(grepl("RNF2", rna_species)) %>%
    pull(rna_species)) {
    pegrna_plots(
      sequence_name = rna_species,
      normalization_factor = norm_factor,
      ylab = sprintf("Coverage (normalized to %s)", norm_factor),
    )
  }
}

---
title: "Adamson smallRNA - pegRNA"
output:
  html_notebook:
    toc: true
    code_folding: hide
---

## Load libraries

This project uses [`renv`](https://rstudio.github.io/renv/articles/renv.html)
to keep track of installed packages. Install `renv` if not installed and load
dependencies with `renv::restore()`.

```r
install.packages("renv")
renv::restore()
```

```{r message=FALSE}
library(readr)
library(dplyr)
library(tidyr)
library(reshape2)
library(GenomicRanges)
library(pheatmap)
library(tibble)
library(ggplot2)
library(stringr)
library(cowplot)
library(markdown)
library(RColorBrewer)
library(GenomicAlignments)
library(reshape2)
```

## Read data

1. Get list of samples

```{r}
samples <- read_tsv("config/samples.tsv", show_col_types = FALSE)
units <- read_tsv("config/units.tsv", show_col_types = FALSE)
sample_units <- dplyr::left_join(samples, units, by = "sample_name") %>%
  unite(sample_unit, sample_name, unit_name, remove = FALSE)
sample_units
```

2. Read Samtools `idxstats` to get human coverage for normalization

Notes:

* The counts include the total number of reads aligned, they 
  are not limited to uniquely aligned reads.
* The counts are reads, not pairs or fragments

```{r}
idxstats_exogenousrna_dir <-
  "results/samtools_idxstats/exogenous_rna/"

idxstats_human_dir <-
  "results/samtools_idxstats/Homo_sapiens.GRCh38.dna.primary_assembly/"

bowtie2_human_logs <-
  "results/logs/bowtie2/Homo_sapiens.GRCh38.dna.primary_assembly/"

idxstats <- tibble()

for (row in seq_len(nrow(sample_units))) {
  sample <- sample_units[row, ]$sample_unit

  # Read `idsxstats` for exogenous mapped reads
  exogenous_rna_stats <- read_tsv(
    file.path(idxstats_exogenousrna_dir, sprintf("%s.bam.idxstats", sample)),
    col_names = c(
      "sequence_name", "sequence_length",
      "mapped_reads", "unmapped_reads"
    ),
    col_types = "ciii"
  )
  exogenous_rna_mapped_reads <- exogenous_rna_stats %>%
    filter(!sequence_name %in% c("*")) %>%
    select(sequence_name, mapped_reads) %>%
    mutate(sample = sample)

  # Read `idxstats` for human mapped reads
  human_stats <- read_tsv(
    file.path(idxstats_human_dir, sprintf("%s.bam.idxstats", sample)),
    col_names = c(
      "sequence_name", "sequence_length",
      "mapped_reads", "unmapped_reads"
    ),
    col_types = "ciii"
  )
  grch38_mapped_reads <- human_stats %>%
    filter(!sequence_name %in% c("*")) %>%
    select(mapped_reads) %>%
    sum()
  grch38_mapped_reads <- tibble(
    sequence_name = "grch38_mapped_reads",
    mapped_reads = grch38_mapped_reads,
    sample = sample
  )

  # Read bowtie2 logs for unmapped reads
  bowtie2_log <- readLines(
    file.path(bowtie2_human_logs, sprintf("%s.log", sample))
  )
  total_pairs <- strtoi(str_split(bowtie2_log[1], " ")[[1]][1])
  total_reads <- total_pairs * 2
  unmapped_reads <- tibble(
    sequence_name = "unmapped",
    mapped_reads = total_reads - grch38_mapped_reads$mapped_reads,
    sample = sample
  )

  # Consolidate counts for rows
  idxstats <- rbind(
    idxstats,
    exogenous_rna_mapped_reads,
    grch38_mapped_reads,
    unmapped_reads
  )
}
idxstats
```

3. Read `bedpe` files to get exogenous rna coverage of paired reads

```{r}
bedpe_data <- tibble()
for (sample in sample_units$sample_unit) {
  data <-
    readr::read_tsv(
      sprintf(
        "results/alignments/exogenous_rna/bedpe/%s.bedpe", sample
      ),
      col_names = c(
        "chrom1", "chrom1Start", "chrom1End",
        "chrom2", "chrom2Start", "chrom2End",
        "name", "score", "strand1", "strand2"
      ),
      col_types = "ciiciicicc"
    )
  bedpe_data <- tibble(rbind(
    bedpe_data,
    cbind(
      sample = sample,
      data
    )
  ))
}
bedpe_data
```

## Coverage

### Concordant vs Discordant paired reads

Concordant pairs are pairs of reads that:

* Align on the same pegRNA
* Align within 500 bp of each other
* Align in the expected forward-reverse orientation (`--> .. <--`)

Discordant reads aligned but whose mate:

* Did not align (on the pegRNA)
* Aligned more than 500 bp away
* Aligned in an unexpected orientation

```{r fig.width=10, fig.height=10}
## Config and function definition

bam_dir <- "results/alignments/exogenous_rna/sorted"

last_day <- 0
cols <- brewer.pal(n = 5, name = "RdBu")

concordant_cell_line_colors <- list(
  "Parental" = "#CA0020",
  "P1E10" = "#0571B0"
)

discordant_cell_line_colors <- list(
  "Parental" = "#F4A582",
  "P1E10" = "#92C5DE"
)

# Exogenous RNA mixtures
rna_mixes <- tibble()
for (mix in c("mastermix1", "mastermix2")) {
  t <- readDNAStringSet(sprintf("data/references/%s.fa", mix))
  rna_mixes <- rbind(rna_mixes, tibble(
    exogenous_rna = mix,
    rna_species = word(t@ranges@NAMES, 1),
    length = t@ranges@width
  ))
}

get_pegrna_plot_data <- function(sequence_name,
                                 normalization_factor,
                                 mix = NA) {
  samples <- sample_units %>%
    full_join(rna_mixes, by = "exogenous_rna") %>%
    arrange(.data$exogenous_rna, day, cell_line) %>%
    filter(rna_species == sequence_name)

  if (!is.na(mix)) {
    samples <- samples %>% filter(.data$exogenous_rna == mix)
  }
  samples <- samples %>% pull(.data$sample_unit)

  plot_data <- list()

  for (s in samples) {
    if (normalization_factor == "exogenous_rna_mapped_reads") {
      norm_seqname <- sequence_name
    } else if (normalization_factor == "grch38_mapped_reads") {
      norm_seqname <- "grch38_mapped_reads"
    }
    normalization_read_count <- idxstats %>%
      filter(sample == s) %>%
      filter(sequence_name == norm_seqname) %>%
      pull(.data$mapped_reads)

    cell_line <- sample_units %>%
      filter(.data$sample_unit == s) %>%
      pull(cell_line)

    day <- sample_units %>%
      filter(.data$sample_unit == s) %>%
      pull(day)

    which <- GRanges(sprintf("%s:1-%i", sequence_name, rna_mixes %>%
      filter(sequence_name == sequence_name) %>%
      pull(length)))
    param_discordant <- ScanBamParam(
      flag = scanBamFlag(isProperPair = FALSE),
      which = which
    )

    discordant <- readGAlignments(sprintf("%s/%s.bam", bam_dir, s),
      param = param_discordant
    )
    cov_discordant <- coverage(discordant)
    cov_discordant_norm <- cov_discordant / normalization_read_count

    which <- GRanges(sprintf("%s:1-%i", sequence_name, rna_mixes %>%
      filter(sequence_name == sequence_name) %>%
      pull(length)))
    param_concordant <- ScanBamParam( # nolint
      flag = scanBamFlag(isProperPair = TRUE),
      which = which
    )

    df_complete <- bedpe_data %>%
      filter(sample == s) %>%
      filter(.data$chrom1 == sequence_name)
    concordant <- GRanges(
      ranges = IRanges(
        start = df_complete$chrom1Start,
        end = df_complete$chrom2End
      ),
      seqnames = df_complete$chrom1
    )
    cov_concordant <- coverage(concordant)
    cov_concordant_norm <- cov_concordant / normalization_read_count

    plot_data[[s]] <- list(
      cell_line = cell_line,
      day = day,
      discordant = cov_discordant_norm,
      concordant = cov_concordant_norm
    )
  }
  return(plot_data)
}

pegrna_plots <- function(sequence_name,
                         normalization_factor,
                         mix = NA,
                         ylim = NA,
                         ylab,
                         vlines = NA) {
  plot_data <- get_pegrna_plot_data(
    sequence_name = sequence_name,
    normalization_factor = normalization_factor,
    mix = mix
  )

  # Find largest value for rna_species
  if (is.na(ylim)) {
    m <- 0
    for (s in names(plot_data)) {
      m <- max(
        m,
        max(plot_data[[s]][["concordant"]][[sequence_name]]),
        max(plot_data[[s]][["discordant"]][[sequence_name]])
      )
    }
    ylim <- c(0, m)
  }

  last_day <- 0

  par(mfrow = c(2, 1))

  for (s in names(plot_data)) {
    series_data_concordant <- plot_data[[s]][["concordant"]][[sequence_name]]
    series_data_discordant <- plot_data[[s]][["discordant"]][[sequence_name]]

    day <- plot_data[[s]][["day"]]
    cell_line <- plot_data[[s]][["cell_line"]]
    concordant_color <- concordant_cell_line_colors[[cell_line]]
    discordant_color <- discordant_cell_line_colors[[cell_line]]

    if (day != last_day) {
      plot(series_data_concordant,
        type = "l",
        col = concordant_color,
        ylim = ylim,
        main = sprintf("Day %s", day),
        xlab = sprintf("%s position", sequence_name),
        ylab = ylab
      )
      lines(series_data_discordant,
        type = "l",
        col = discordant_cell_line_colors[[plot_data[[s]][["cell_line"]]]]
      )
    } else {
      lines(series_data_concordant,
        type = "l",
        col = concordant_color
      )
      lines(series_data_discordant,
        type = "l",
        col = discordant_color
      )
    }
    legend("top",
      legend = c(
        "Parental:Concordant",
        "P1E10:Concordant",
        "Parental:Discordant",
        "P1E10:Discordant"
      ),
      col = unlist(c(
        concordant_cell_line_colors,
        discordant_cell_line_colors
      )),
      lty = 1,
      cex = 0.75
    )
    for (vline in vlines) {
      abline(v = vline, lty = 2)
      text(
        x = vline, y = 0, as.character(vline),
        pos = 2, cex = 0.75, lwd = 0.5
      )
    }
    last_day <- day
  }
}
```

### VEGFA

```{r fig.width=10, fig.height=10}
rna_mix_rows <- rna_mixes %>% filter(grepl("VEGFA", rna_species))

for (norm_factor in c("exogenous_rna_mapped_reads", "grch38_mapped_reads")) {
  for (row in seq_len(nrow(rna_mix_rows))) {
    rna_species <- rna_mix_rows[[row, "rna_species"]]
    mix <- rna_mix_rows[[row, "exogenous_rna"]]

    pegrna_plots(
      sequence_name = rna_species,
      normalization_factor = norm_factor,
      mix = mix,
      ylab = sprintf("%s coverage (normalized to %s)", mix, norm_factor),
    )
  }
}
```

### FANCF

```{r fig.width=10, fig.height=10}
for (norm_factor in c("exogenous_rna_mapped_reads", "grch38_mapped_reads")) {
  for (rna_species in rna_mixes %>%
    filter(grepl("FANCF", rna_species)) %>%
    pull(rna_species)) {
    pegrna_plots(
      sequence_name = rna_species,
      normalization_factor = norm_factor,
      ylab = sprintf("Coverage (normalized to %s)", norm_factor),
    )
  }
}
```

### HEK3

```{r fig.width=10, fig.height=10}
for (norm_factor in c("exogenous_rna_mapped_reads", "grch38_mapped_reads")) {
  for (rna_species in rna_mixes %>%
    filter(grepl("HEK3", rna_species)) %>%
    pull(rna_species)) {
    pegrna_plots(
      sequence_name = rna_species,
      normalization_factor = norm_factor,
      ylab = sprintf("Coverage (normalized to %s)", norm_factor),
    )
  }
}
```



#### DNMT1

```{r fig.width=10, fig.height=10}
for (norm_factor in c("exogenous_rna_mapped_reads", "grch38_mapped_reads")) {
  for (rna_species in rna_mixes %>%
    filter(grepl("DNMT1", rna_species)) %>%
    pull(rna_species)) {
    pegrna_plots(
      sequence_name = rna_species,
      normalization_factor = norm_factor,
      ylab = sprintf("Coverage (normalized to %s)", norm_factor),
    )
  }
}
```


### RUNX1

```{r fig.width=10, fig.height=10}
for (norm_factor in c("exogenous_rna_mapped_reads", "grch38_mapped_reads")) {
  for (rna_species in rna_mixes %>%
    filter(grepl("RUNX1", rna_species)) %>%
    pull(rna_species)) {
    pegrna_plots(
      sequence_name = rna_species,
      normalization_factor = norm_factor,
      ylab = sprintf("Coverage (normalized to %s)", norm_factor),
    )
  }
}
```

### EMX1

```{r fig.width=10, fig.height=10}
for (norm_factor in c("exogenous_rna_mapped_reads", "grch38_mapped_reads")) {
  for (rna_species in rna_mixes %>%
    filter(grepl("EMX1", rna_species)) %>%
    pull(rna_species)) {
    pegrna_plots(
      sequence_name = rna_species,
      normalization_factor = norm_factor,
      ylab = sprintf("Coverage (normalized to %s)", norm_factor),
    )
  }
}
```

### RNF2

```{r fig.width=10, fig.height=10}
for (norm_factor in c("exogenous_rna_mapped_reads", "grch38_mapped_reads")) {
  for (rna_species in rna_mixes %>%
    filter(grepl("RNF2", rna_species)) %>%
    pull(rna_species)) {
    pegrna_plots(
      sequence_name = rna_species,
      normalization_factor = norm_factor,
      ylab = sprintf("Coverage (normalized to %s)", norm_factor),
    )
  }
}
```
